{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Chapter 4 - Introduction to Autoregressive and Automated Methods for Time Series Forecasting - Azure Machine Learning Example\n",
    "\n",
    "## Automated Machine Learning"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# This should be done in a seperate environment as azureml-sdk conflicts with some of our package versions such as statsmodels 0.12\n",
    "# The environment this was tested with is \n",
    "# name: azureml\n",
    "# channels:\n",
    "#   - defaults\n",
    "#   - conda-forge\n",
    "# dependencies:\n",
    "#   - python=3.6\n",
    "#   - matplotlib=3.1.1\n",
    "#   - pandas=1.1.1\n",
    "#   - pip\n",
    "#   - pip:\n",
    "#     - azureml-sdk[automl,notebooks,explain]\n",
    "#     - azuremlftk\n",
    "#     - azure-cli"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import logging\n",
    "import os\n",
    "import warnings\n",
    "import numpy as np\n",
    "import pandas as pd\n",
    "\n",
    "from matplotlib import pyplot as plt\n",
    "from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score\n",
    "from datetime import datetime\n",
    "\n",
    "import azureml.core\n",
    "from azureml.core import Dataset, Experiment, Workspace\n",
    "from azureml.train.automl import AutoMLConfig\n",
    "\n",
    "warnings.showwarning = lambda *args, **kwargs: None"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "experiment_name = 'automatedML-timeseriesforecasting'\n",
    "experiment = Experiment(ws, experiment_name)\n",
    "output = {}\n",
    "output['SDK version'] = azureml.core.VERSION\n",
    "output['Subscription ID'] = ws.subscription_id\n",
    "output['Workspace'] = ws.name\n",
    "output['Resource Group'] = ws.resource_group\n",
    "output['Location'] = ws.location\n",
    "output['Run History Name'] = experiment_name\n",
    "pd.set_option('display.max_colwidth', -1)\n",
    "outputDf = pd.DataFrame(data = output, index = [''])\n",
    "outputDf.T"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 2
   },
   "outputs": [],
   "source": [
    "cts = ws.compute_targets\n",
    "if amlcompute_cluster_name in cts and cts[amlcompute_cluster_name].type == \"AmlCompute\":\n",
    "    found = True\n",
    "    print(\"Found existing compute target.\")\n",
    "    compute_target = cts[amlcompute_cluster_name]\n",
    "\n",
    "if not found:\n",
    "    print(\"Creating a new compute target...\")\n",
    "    provisioning_config = AmlCompute.provisioning_configuration(\n",
    "        vm_size=\"STANDARD_DS12_V2\",\n",
    "        max_nodes=6,\n",
    "    )\n",
    "\n",
    "    compute_target = ComputeTarget.create(\n",
    "        ws, amlcompute_cluster_name, provisioning_config\n",
    "    )\n",
    "\n",
    "print(\"Checking cluster status...\")\n",
    "\n",
    "compute_target.wait_for_completion(\n",
    "    show_output=True, min_node_count=None, timeout_in_minutes=20\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "target_column_name = \"demand\"\n",
    "time_column_name = \"timeStamp\"\n",
    "\n",
    "ts_data = Dataset.Tabular.from_delimited_files(\n",
    "    path=\"https://automlsamplenotebookdata.blob.core.windows.net/automl-sample-notebook-data/nyc_energy.csv\"\n",
    ").with_timestamp_columns(fine_grain_timestamp=time_column_name)\n",
    "\n",
    "ts_data.take(5).to_pandas_dataframe().reset_index(drop=True)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "ts_data = ts_data.time_before(datetime(2017, 10, 10, 5))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "train = ts_data.time_before(datetime(2017, 8, 8, 5), include_boundary=True)\n",
    "train.to_pandas_dataframe().reset_index(drop=True).sort_values(time_column_name).tail(5)\n",
    "\n",
    "test = ts_data.time_between(datetime(2017, 8, 8, 6), datetime(2017, 8, 10, 5))\n",
    "test.to_pandas_dataframe().reset_index(drop=True).head(5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "max_horizon = 24"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "automl_settings = {\n",
    "    \"time_column_name\": time_column_name,\n",
    "    \"max_horizon\": max_horizon,\n",
    "}\n",
    "\n",
    "automl_config = AutoMLConfig(\n",
    "    task=\"forecasting\",\n",
    "    primary_metric=\"normalized_root_mean_squared_error\",\n",
    "    blocked_models=[\"ExtremeRandomTrees\", \"AutoArima\", \"Prophet\"],\n",
    "    experiment_timeout_hours=0.3,\n",
    "    training_data=train,\n",
    "    label_column_name=target_column_name,\n",
    "    compute_target=compute_target,\n",
    "    enable_early_stopping=True,\n",
    "    n_cross_validations=3,\n",
    "    verbosity=logging.INFO,\n",
    "    **automl_settings\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "remote_run = experiment.submit(automl_config, show_output=False)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "remote_run.wait_for_completion()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "best_run, fitted_model = remote_run.get_output()\n",
    "fitted_model.steps"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "featurization_summary = fitted_model.named_steps[\n",
    "    \"timeseriestransformer\"\n",
    "].get_featurization_summary()\n",
    "\n",
    "pd.DataFrame.from_records(featurization_summary)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "X_test = test.to_pandas_dataframe().reset_index(drop=True)\n",
    "y_test = X_test.pop(target_column_name).values"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "y_predictions, X_trans = fitted_model.forecast(X_test)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "lines_to_next_cell": 0
   },
   "outputs": [],
   "source": [
    "from common.forecasting_helper import align_outputs\n",
    "\n",
    "ts_results_all = align_outputs(y_predictions, X_trans, X_test, y_test, target_column_name)\n",
    "\n",
    "from automl.client.core.common import constants\n",
    "from azureml.automl.core._vendor.automl.client.core.common import metrics\n",
    "from matplotlib import pyplot as plt\n",
    "\n",
    "scores = metrics.compute_metrics_regression(\n",
    "    ts_results_all[\"predicted\"],\n",
    "    ts_results_all[target_column_name],\n",
    "    list(constants.Metric.SCALAR_REGRESSION_SET),\n",
    "    None,\n",
    "    None,\n",
    "    None,\n",
    ")\n",
    "\n",
    "print(\"[Test data scores]\\n\")\n",
    "for key, value in scores.items():\n",
    "    print(\"{}:   {:.3f}\".format(key, value))\n",
    "\n",
    "%matplotlib inline\n",
    "test_pred = plt.scatter(ts_results_all[target_column_name], ts_results_all[\"predicted\"], color=\"b\")\n",
    "test_test = plt.scatter(\n",
    "    ts_results_all[target_column_name], ts_results_all[target_column_name], color=\"g\"\n",
    ")\n",
    "plt.legend(\n",
    "    (test_pred, test_test), (\"prediction\", \"truth\"), loc=\"upper left\", fontsize=8\n",
    ")\n",
    "plt.show()"
   ]
  }
 ],
 "metadata": {
  "jupytext": {
   "formats": "ipynb,py"
  },
  "kernelspec": {
   "display_name": "Python 3.6 - AzureML",
   "language": "python",
   "name": "python3-azureml"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.9"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
